This code is aimed at guiding the reader through the analysis process followed to obtain the results shown in our paper. All code necessary to reproduce our results is presented here. To access the data and other files necessary to reproduce our analysis please head to our GitHub or OSF repositories.
Prerequisites:
The following packages are needed to employ this R Markdown document and can be easily installed from CRAN by uncommenting the install.packages function below:
# install.packages(c("readr", "tidyverse", "lme4", "rmcorr", "effects",
# "coefplot", "arm", "RColorBrewer", "reshape2", "gridExtra",
# "rmarkdown", "gt", "cowplot", "RSQLite", "BaSTA",
# "snowfall", "kableExtra", "lubridate", "MuMIn", "GGally",
# "performance", "magick", "ggpubr", "extrafont", "jpeg",
# "grid", "standardize", "performance", "broom"))
library(readr)
library(tidyverse)
library(lme4)
library(rmcorr)
library(effects)
library(coefplot)
library(arm)
library(RColorBrewer)
library(reshape2)
library(gridExtra)
library(rmarkdown)
library(gt)
library(cowplot)
library(RSQLite)
library(BaSTA)
library(snowfall)
library(kableExtra)
library(lubridate)
library(MuMIn)
library(GGally)
library(performance)
library(magick)
library(ggpubr)
library(extrafont)
library(jpeg)
library(grid)
library(standardize)
library(performance)
library(broom)Firstly, connect to the CeutaOPEN SQLite database (open-access between 2006 to 2016; for more details, see: Eberhart-Phillips, L.J., Cruz-López, M., Lozano-Angulo, L. et al. CeutaOPEN, individual-based field observations of breeding snowy plovers Charadrius nivosus. Scientific Data 7, 149 (2020). https://doi.org/10.1038/s41597-020-0490-y).
Our raw field data collected between 2017 to 2019 are not yet publically availble, but the data in these years that are needed to reproduce the results shown in our manuscript are provided here in the BaSTA_checked_life_table_females.rds (for Part 1 “BaSTA model selection”) and the egg_volume_data_2006_2019.rds (for Parts 2 and 3) files found in the data folder included in the project’s OSF repository.
Ceuta_OPEN <-
dbConnect(SQLite(), dbname = "data/Ceuta_OPEN_v1-5.sqlite")Wrangle the capture data
captures <-
# extract the capture datatable
dbReadTable(Ceuta_OPEN,"Captures") %>%
# subset to only include snowy plovers
filter(species == "SNPL") %>%
# group by individual
group_by(ring) %>%
# determine the first year that an individual was seen
mutate(originalsight = ifelse(age == "J", year,
year[which.min(as.numeric(year))])) %>%
# determine if the first encouter was as an adult or a juvenile
mutate(chick = age[which.min(as.numeric(year))]) %>%
# specify first encouters as juveniles as recruits and adults as immigrants
mutate(recruit = ifelse(chick == "J", "Recruit", "Immigrant")) %>%
# if a recruit then specify the year at which it was first encountered (i.e., it's birth year); 0 for immigrants
mutate(birth = ifelse(recruit == "Recruit", originalsight, "0")) %>%
# remove 13 rows that do not have ring information (i.e., these are "observation only" captures of XX.XX|XX.XX birds)
filter(ring != "NA") %>%
# remove all individuals that are of unknown sex status
filter(sex != "U") %>%
# subset to females
filter(sex == "F")Assess the sample size of the capture population
# assess how many individuals we have: Female = 807
captures %>%
group_by(sex) %>%
summarise(count = n_distinct(ring)) %>%
collect() %>%
kable(col.names = c("Sex",
"Number of individuals")) %>%
kable_styling() %>%
scroll_box(width = "50%")| Sex | Number of individuals |
|---|---|
| F | 807 |
# assess how many immigrants and recruits we have:
# Immigrant Females = 359, Recruit Females = 448
captures %>%
group_by(recruit, sex) %>%
summarise(n_inds = n_distinct(ring)) %>%
collect() %>%
kable(col.names = c("Status",
"Sex",
"Number of individuals")) %>%
kable_styling() %>%
scroll_box(width = "50%")| Status | Sex | Number of individuals |
|---|---|---|
| Immigrant | F | 359 |
| Recruit | F | 448 |
Wrangle the resight data
resightings <-
# extract the resight datatable
dbReadTable(Ceuta_OPEN,"Resights") %>%
# subset to only include snowy plovers
filter(species == "SNPL") %>%
# merge the ring identity to the resight codes
left_join(., dplyr::select(captures, ring, code), by = "code") %>%
# remove duplicates
distinct()
# extract combinations that are unique
unique_combos <-
resightings %>%
# remove all combos without a distinct set of rings (i.e., needs at most 4 X's)
filter(str_count(code, "X") < 5) %>%
# remove all combos with uncertainty (i.e., no ?'s)
filter(str_detect(code, "\\?", negate = TRUE)) %>%
# remove all combos without the appropriate number of rings
filter(nchar(code) == 11) %>%
# extract the combos that have only one metal ring associate with them
group_by(code) %>%
summarise(n_obs = n_distinct(ring)) %>%
arrange(desc(n_obs)) %>%
filter(n_obs == 1)
resightings <-
resightings %>%
# subset resightings to only include observations of unique combinations
filter(code %in% unique_combos$code) %>%
# remove all combos that don't have a ring associated with them
filter(!is.na(ring)) %>%
# sort by ring and year to assess the output
arrange(ring, year)Tidy up capture and resight data
capture_final <-
captures %>%
# paste ring and year together to make a unique identifier for this observation
unite(ring_year, ring, year, remove = FALSE) %>%
# specify as a capture
mutate(observation = "capture") %>%
# clean up output
dplyr::select(ring_year, sex, ring, year, recruit, birth, observation) %>%
arrange(ring, year)
resightings_final <-
resightings %>%
# paste ring and year together to make a unique identifier for this observation
unite(ring_year, ring, year, remove = FALSE) %>%
# specify as a resight
mutate(observation = "resight") %>%
# clean up output
dplyr::select(ring_year, ring, year, observation) %>%
# join with the capture data to merge recruit status, sex, and birth year
left_join(., dplyr::select(capture_final, ring, recruit, birth, sex), by = "ring") %>%
# remove duplicates
distinct()Bind capture and resight data into a complete encounter history
encounter_histories <-
bind_rows(capture_final, resightings_final) %>%
arrange(ring_year)Remove resight encounters that occured prior to the first capture
# determine the year of first capture for each individual
first_cap <-
capture_final %>%
group_by(ring) %>%
filter(as.numeric(year) == min(as.numeric(year))) %>%
dplyr::select(ring, year) %>%
distinct(ring,.keep_all = TRUE) %>%
rename(first_cap = year) %>%
arrange(first_cap)
# determine which resights occured before the first capture
resights_before_first_capture <-
encounter_histories %>%
left_join(., first_cap, by = "ring") %>%
filter(observation == "resight" & (as.numeric(year) < as.numeric(first_cap)))
# function that does the opposite of "%in%"
`%!in%` = Negate(`%in%`)
# exclude early resightings from encounter history
encounter_histories <-
encounter_histories %>%
filter(ring_year %!in% resights_before_first_capture$ring_year) %>%
arrange(ring, as.numeric(year))Make the encounter history table needed for the survival analysis
encounter_history_table <-
distinct(encounter_histories, ring_year, .keep_all = TRUE) %>%
dplyr::select(ring, year) %>%
arrange(ring) %>%
mutate(year = as.integer(year)) %>%
CensusToCaptHist(ID = .$ring,
d = .$year,
timeInt = "Y") %>%
mutate(ring = rownames(.),
ID = as.character(ID))Extract the known birth and death information for each individual
birth_death_mat <-
encounter_histories %>%
dplyr::select(ring, birth) %>%
mutate(death = 0) %>%
arrange(ring) %>%
distinct(ring, .keep_all = TRUE)Unite the encounter history table with the birth and death matrices
life_table <-
left_join(birth_death_mat, encounter_history_table, by = "ring") %>%
as.data.frame() %>%
distinct() %>%
dplyr::select(ID, birth, death,
"2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013",
"2014", "2015", "2016", "2017",
"2018", "2019", ring) %>%
mutate_at(vars(-ring), as.numeric) %>%
mutate(sum_years = rowSums(.[4:17])) %>%
filter(sum_years > 1 | birth == 0) %>%
mutate(ID = as.numeric(row.names(.))) %>%
dplyr::select(-sum_years)Summarise the encounter history sampling distribution
life_table %>%
mutate(sum_years = rowSums(.[4:17])) %>%
arrange(desc(sum_years)) %>%
summarise(avg_obs = mean(sum_years),
sd_obs = sd(sum_years),
med_obs = median(sum_years),
min_obs = min(sum_years),
max_obs = max(sum_years)) %>%
collect() %>%
kable(col.names = c("Avg. no. encounters",
"SD no. encounters",
"Med. no. encounters",
"Min. no. encounters",
"Max. no. encounters")) %>%
kable_styling() %>%
scroll_box(width = "100%")| Avg. no. encounters | SD no. encounters | Med. no. encounters | Min. no. encounters | Max. no. encounters |
|---|---|---|---|---|
| 2.13 | 1.492874 | 2 | 1 | 12 |
Run “DataCheck” BaSTA function to make final cleans to encounter history. The only change needed is that the birth year of recruits should be ‘0’ instead of ‘1’; this function will solve this issue and provide a cleaned version ready for analysis. In total, we have 811 encounters of 400 individually marked females that were seen as adults, of which 41 are of known birth year.
BaSTA_checked_life_table_females_final <-
DataCheck(object = life_table[, -c(length(life_table))],
studyStart = 2006, studyEnd = 2019,
autofix = rep(1, 7), silent = FALSE)## The following rows have a one in the recapture matrix in the birth year:
## [1] 35 42 55 77 78 109 110 114 124 127 131 133 134 149 152 153 166
## [18] 170 177 181 190 193 201 202 210 215 221 226 242 248 249 250 263 270
## [35] 296 297 304 308 310 312 335
## *DataSummary*
## - Number of individuals = 400
## - Number with known birth year = 41
## - Number with known death year = 0
## - Number with known birth
## AND death years = 0
##
## - Total number of detections
## in recapture matrix = 811
##
## - Earliest detection time = 2006
## - Latest detection time = 2019
## - Earliest recorded birth year = 2006
## - Latest recorded birth year = 2016
Run the multibasta function to fit all possible survival trends to the data, while specifiying the study start year at 2006, study end year at 2019, 800000 iterations, burnin of 10000, thinning everying 2000th iteration, 4 parallel chains, and a minimum age of 1 (i.e., our mark-recapture data only includes individuals that are either adults of unknown age (age >= 1), or individuals that were born locally but were encountered in subsequent years (ie., first-year survival = 1.0)).
multiout_females <-
multibasta(object = BaSTA_checked_life_table_females$newData,
studyStart = 2006, studyEnd = 2019, covarsStruct = "fused",
minAge = 1, niter = 800000, burnin = 10000, thinning = 2000,
nsim = 4, parallel = TRUE, ncpus = 4, updateJumps = TRUE)DIC model selection shows that the Logistic mortality model with bathtub shape fits our data the best
| Mortality function | Shape | k | ΔDIC |
|---|---|---|---|
| Logistic | Bathtub | 7 | 0.00 |
| Logistic | Simple | 4 | 73.17 |
| Logistic | Makeham | 5 | 74.28 |
| Weibull | Bathtub | 6 | 101.22 |
| Weibull | Simple | 3 | 160.15 |
| Weibull | Makeham | 4 | 177.54 |
| Gompertz | Bathtub | 6 | 288.96 |
| Gompertz | Simple | 3 | 317.94 |
| Gompertz | Make | 4 | 363.84 |
| Exponential | Simple | 2 | 518.73 |
Extract top model
plover_survival_model <-
multiout_females$runs[[1]]Check the diagnostics to assess the simulation performance
Serial autcorrelation: values should hover around zero…good! Convergence: values should hover around one…good!
Trace plot: should look like a hairy catepillar instead of a snake…good!
Plot age-specific female survival probability and mortality rate for the Logistic mortality model with bathtub shape.
Wrangle the estimated birth year and age-at-death for each individual given the BaSTA output
BaSTA_births <-
t(plover_survival_model$birthQuant) %>%
as.data.frame() %>%
mutate(ID = row.names(.)) %>%
rename(est_b = `50%`,
upper_b = `97.5%`,
lower_b = `2.5%`)
BaSTA_death_ages <-
t(plover_survival_model$agesQuant) %>%
as.data.frame() %>%
mutate(ID = row.names(.)) %>%
rename(est_d = V1,
upper_d = `97.5%`,
lower_d = `2.5%`)
BaSTA_ages <-
life_table %>%
dplyr::select(ID, ring) %>%
mutate(ID = as.character(ID)) %>%
left_join(., BaSTA_births, by = "ID") %>%
left_join(., BaSTA_death_ages, by = "ID") %>%
dplyr::select(-ID)Here we extract a subset of the population that has at least 3 years of repeated measures of egg volume and merge the estimated ages of the BaSTA analysis. Then we use a mixed-model framework to seperate population-average and within-individual age effect, while controlling for selective appearance and disappearence. Finally, we conduct a posthoc peak-performance analysis to identify at which age egg volume starts to senesce.
# join relevant capture data with nest data and subset to captures of adult females
nest_caps_F <-
dbReadTable(Ceuta_OPEN, "Captures") %>%
dplyr::select("ID", "ring", "sex", "age", "date") %>%
left_join(., dbReadTable(Ceuta_OPEN, "Nests"), by = "ID") %>%
filter(sex == "F" & age == "A")
# standardize the lay date information
nest_caps_F <-
nest_caps_F %>%
# set dates as numeric
mutate(nest_initiation_date = as.numeric(nest_initiation_date),
end_date = as.numeric(end_date),
found_date = as.numeric(found_date)) %>%
mutate(date = ifelse(nest_initiation_date == "NANA",
NA, nest_initiation_date)) %>%
mutate(date = as.numeric(ifelse(!is.na(date), date,
ifelse(!is.na(found_date), found_date, date))),
year = as.factor(year)) %>%
# change to as.Date format
plover_date_convert() %>%
# specify the lay date as the nest initiation date. If this is unknown, then
# subtract 25 days from the end date if the nest hatched. If this isnt the case,
# then subtract 11 days from the found date if the float score of the first
# egg is "F". If this isn't the case, then take the found date.
mutate(lay_date =
as.Date(
ifelse(!is.na(nest_initiation_date), nest_initiation_date,
ifelse((!is.na(end_date) & fate == "Hatch"), end_date - 25,
ifelse((!is.na(found_date) & float1 == "F"), found_date - 11,
ifelse(!is.na(found_date), found_date, NA)))),
origin = "1970-01-01")) %>%
mutate(lay_date =
as.Date(ifelse(!is.na(found_date), found_date, NA),
origin = "1970-01-01")) %>%
# create a julian lay date
mutate(jul_lay_date = as.numeric(format(lay_date, "%j"))) %>%
# remove any duplicated rows resulting from joining above
distinct() %>%
mutate(
# scale the julian lay date by year
jul_std_date = scale_by(jul_lay_date ~ year, .)) %>%
# remove any rows without lay date information
filter(!is.na(jul_std_date))
# extract females that have non-NA egg dimensions and have at least 3 repeated measures
n_years_3 <-
nest_caps_F %>%
filter(!is.na(width1) & !is.na(length1)) %>%
group_by(ring) %>%
summarise(n_measures = n_distinct(year)) %>%
filter(n_measures > 2)
# subset to females that have non-NA egg dimensions and have at least 3 repeated measures
# and specify lay date based on nest_initiation_date if available (if not, then take
# found_date. Note: for eggs found at float stage F that didn't hatch, it is impossible
# to back-calculate the lay date, and thus the found_date is the best we have available)
nest_caps_F_3 <-
nest_caps_F %>%
filter(ring %in% n_years_3$ring) %>%
dplyr::select(c(ID, ring, year, jul_lay_date, jul_std_date,
width1, length1,
width2, length2,
width3, length3))
# extract dimensions of egg 1
nest_caps_F_3_egg1 <-
nest_caps_F_3 %>%
dplyr::select(ID, ring, year, jul_lay_date, jul_std_date,
width1, length1) %>%
rename(width = width1,
length = length1) %>%
mutate(egg = "egg1")
# extract dimensions of egg 2
nest_caps_F_3_egg2 <-
nest_caps_F_3 %>%
dplyr::select(ID, ring, year, jul_lay_date, jul_std_date,
width2, length2) %>%
rename(width = width2,
length = length2) %>%
mutate(egg = "egg2")
# extract dimensions of egg 3
nest_caps_F_3_egg3 <-
nest_caps_F_3 %>%
dplyr::select(ID, ring, year, jul_lay_date, jul_std_date,
width3, length3) %>%
rename(width = width3,
length = length3) %>%
mutate(egg = "egg3")
# bind all egg measurements and remove NA observations
eggdf <-
bind_rows(nest_caps_F_3_egg1,
nest_caps_F_3_egg2,
nest_caps_F_3_egg3) %>%
filter(!is.na(width) | !is.na(length)) %>%
# calculate egg volume
mutate(eggv = 0.486 * length * width^2,
year = as.integer(as.character(year))) %>%
# sort by ring and nest ID and remove duplicated rows
arrange(ring, ID) %>%
distinct() %>%
# remove nest 2017_C_6 (parentage unclear)
filter(ID != "2017_C_6") %>%
# remove CN0232 and CA2334 (only two years of data after removal of 2017_C_6)
filter(ring != "CN0232" & ring != "CA2334") %>%
# merge with basta age estimates
left_join(., BaSTA_ages, by = "ring") %>%
mutate(
# calculate the estimated age at a given year
est_age = year - est_b,
# calculate upper and lower 95% CI for age at a given year
est_age_lower = year - upper_b,
est_age_upper = year - lower_b,
est_death_age = upper_d) %>%
# remove duplicate rows and extraneous columns
distinct()
# Determine the age at first capture for females in the data
age_at_first_cap_info <-
dbReadTable(Ceuta_OPEN, "Captures") %>%
filter(ring %in% eggdf$ring) %>%
plover_date_convert() %>%
group_by(ring) %>%
summarise(age_first_cap = age[which.min(date)],
year_first_cap = as.numeric(year[which.min(year)])) %>%
collect() %>%
# manually assign J to CA2036 and CA1526 which were captured as chicks during
# the small study in 2004
mutate(age_first_cap = ifelse(ring %in% c("CA2036", "CA1526"), "J", age_first_cap))
# join the age at first capture info to the data
eggdf <-
left_join(eggdf, age_at_first_cap_info,
by = "ring") %>%
mutate(year_first_cap = ifelse(age_first_cap == "J", est_b, year_first_cap)) %>%
mutate(conservative_age = year - year_first_cap)
# join polyandry information
eggdf_mating_sys_all <-
dbReadTable(Ceuta_OPEN, "BirdRef") %>%
dplyr::select(year, ID, male, female) %>%
filter(female %in% eggdf$ring) %>%
arrange(female) %>%
group_by(female, year) %>%
summarise(n_mates = n_distinct(male, na.rm = TRUE)) %>%
mutate(polyandry = ifelse(n_mates > 1, "poly", "mono"),
year = as.integer(year),
n_mates = ifelse(n_mates == 0, 1, n_mates)) %>%
rename(ring = female)
matings_without_egg_obs <-
anti_join(eggdf_mating_sys_all, eggdf,
by = c("ring", "year")) %>%
left_join(., dplyr::select(eggdf, ring, age_first_cap, est_b), by = "ring") %>%
mutate(est_age = year - est_b) %>%
distinct()
eggdf <-
left_join(eggdf, eggdf_mating_sys_all,
by = c("ring", "year"))Mean number of mates per year per individual
# Create the function.
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
# average number of mates per year
eggdf_mating_sys_summary <-
eggdf_mating_sys_all %>%
group_by(ring) %>%
summarise(mean_n_mates_per_year = mean(n_mates),
median_n_mates_per_year = median(n_mates),
modal_mating_strategy = getmode(polyandry))
# grand mean mates per female per year
mean(eggdf_mating_sys_summary$mean_n_mates_per_year)## [1] 1.240079
# grand median mates per female per year
median(eggdf_mating_sys_summary$median_n_mates_per_year)## [1] 1
# boxplot of mates per female per year
boxplot(eggdf_mating_sys_summary$mean_n_mates_per_year)This plot visualizes the 51 females used in our senscence analysis. The thin grey lines indicate the age period for which we have encountered an individual (note that the starting age from each individual is based on the mean estimate of an individual’s birth year estimated from the BaSTA package above). The thick light grey lines in back show the lower 95% CI of the BaSTA age of first encounter. The black points show the data used in our analysis, with the size of each point corresponding to the number of clutches laid by a given female in a given year.
| Age at first encounter | Frequency of individuals |
|---|---|
| A | 41 |
| J | 10 |
| Age at first egg measure | Frequency of individuals |
|---|---|
| 1 | 20 |
| 2 | 20 |
| 3 | 1 |
| Average interval | SD interval | Median interval |
|---|---|---|
| 1.169444 | 0.3048664 | 1 |
| Average tenure | SD tenure | Min. tenure | Max. tenure |
|---|---|---|---|
| 5.196078 | 2.289276 | 3 | 13 |
| Clutch size | Frequency of nests |
|---|---|
| 1 | 2 |
| 2 | 31 |
| 3 | 237 |
| Number of years | Frequency of individuals |
|---|---|
| 3 | 32 |
| 4 | 6 |
| 5 | 7 |
| 6 | 4 |
| 7 | 1 |
| 8 | 1 |
| Number of nests | Frequency of individuals |
|---|---|
| 3 | 8 |
| 4 | 15 |
| 5 | 10 |
| 6 | 8 |
| 7 | 3 |
| 8 | 1 |
| 9 | 2 |
| 10 | 3 |
| 11 | 1 |
| Sample size | |
|---|---|
| Years | 13 |
| Individuals | 51 |
| Nests | 270 |
| Eggs | 775 |
| Age | Observations (i.e., Eggs) | Nests | Individuals |
|---|---|---|---|
| 2 | 98 | 34 | 27 |
| 3 | 176 | 62 | 44 |
| 4 | 174 | 60 | 43 |
| 5 | 118 | 42 | 30 |
| 6 | 84 | 29 | 19 |
| 7 | 61 | 21 | 15 |
| 8 | 33 | 11 | 7 |
| 9 | 19 | 7 | 5 |
| 10 | 6 | 2 | 2 |
| 11 | 3 | 1 | 1 |
| 14 | 3 | 1 | 1 |
| value | |
|---|---|
| max_age | 14.000000 |
| min_age | 2.000000 |
| grand_avg_max_age | 14.000000 |
| grand_avg_min_age | 2.000000 |
| grand_avg_avg_age | 4.284469 |
| grand_median_age_span | 4.000000 |
| grand_avg_age_span | 4.431373 |
| max_age_span | 13.000000 |
| min_age_span | 3.000000 |
| sd_age_span | 1.846672 |
| value | |
|---|---|
| avg_obs_per_individual | 3.803922 |
| median_obs_per_individual | 3.000000 |
| max_obs_per_individual | 8.000000 |
| min_obs_per_individual | 3.000000 |
| sd_obs_per_individual | 1.249313 |
Here we summarise for each individual the 1) age of first encounter, 2) the age of last encounter, 3) the average age encountered, 4) the standardized ages of encounter within each individual, and 5) the upper and lower 95% age estimates from BaSTA for these four measures.
# add average, first, and last age information to each ring
age_summary <-
function(df){
# extract the average, first, and last age for each individual
ring_Age <-
df %>%
dplyr::select(ring, est_age, conservative_age,
est_age_lower, est_age_upper) %>%
distinct() %>%
group_by(ring) %>%
summarise(firstage = min(est_age) - 1,
conservative_firstage = min(conservative_age),
firstage_lower = min(est_age_lower),
firstage_upper = min(est_age_upper),
lastage = max(est_age) - 1,
conservative_lastage = max(conservative_age),
lastage_lower = max(est_age_lower),
lastage_upper = max(est_age_upper))
# merge with dataframe
df2 <-
left_join(df, ring_Age, by = "ring") %>%
mutate(est_age = est_age - 1,
conservative_age = ifelse(age_first_cap == "J",
conservative_age - 1,
conservative_age),
conservative_firstage = ifelse(age_first_cap == "J",
conservative_firstage - 1,
conservative_firstage),
conservative_lastage = ifelse(age_first_cap == "J",
conservative_lastage - 1,
conservative_lastage))
return(df2)
}
eggdf <-
age_summary(df = eggdf) %>%
mutate(year = as.factor(year),
ID = as.factor(ID),
ring = as.factor(ring)) %>%
distinct()mod0 <- lmer(eggv ~ 1 +
(1|ring/ID),
data = eggdf)Using Dingemanse et al. (JAE, 2020; DOI: 10.1111/1365-2656.13122) method to control for selective appearance and disappearance by setting first and last age observed as fixed effects
mod1 <- lmer(eggv ~ poly(jul_std_date, 2) +
(1|ring/ID),
data = eggdf)mod2 <- lmer(eggv ~ est_age + firstage + lastage +
poly(jul_std_date, 2) +
(1|ring/ID),
data = eggdf)mod3 <- lmer(eggv ~ poly(est_age, 2) + firstage + lastage +
poly(jul_std_date, 2) +
(1|ring/ID),
data = eggdf)mod4 <- lmer(eggv ~ poly(jul_std_date, 2) +
polyandry +
(1|ring/ID),
data = eggdf)mod5 <- lmer(eggv ~ est_age + firstage + lastage +
poly(jul_std_date, 2) +
polyandry +
(1|ring/ID),
data = eggdf)mod6 <- lmer(eggv ~ poly(est_age, 2) + firstage + lastage +
poly(jul_std_date, 2) +
polyandry +
(1|ring/ID),
data = eggdf)Calculate marginal and conditional R-squared statistics following Nakagawa and Schielzeth (Equ. 29 and 30 and Table 2; 2013).
model_list <-
list(mod0, mod1, mod2, mod3,
mod4, mod5, mod6)
mR2_list <- lapply(model_list, function(x) r2_nakagawa(x))
mR2_list <- lapply(mR2_list, function(x) unlist(x))
mR2_list <- lapply(mR2_list, function(x) as.data.frame(x))
R2_estimates <-
data.frame(condR2 = c(unlist(lapply(mR2_list, function(x) x$x[1]))),
margR2 = c(unlist(lapply(mR2_list, function(x) x$x[2]))),
model = c("mod0", "mod1", "mod2", "mod3",
"mod4", "mod5", "mod6"))Table 2. Model selection by AICc.
| Predictors of egg volume | k | w | ΔAIC | R2conditional | R2marginal |
|---|---|---|---|---|---|
| Senescence + Quadratic lay date + Polyandry | 11 | 0.99 | 0.00 | 0.66 | 0.07 |
| Senescence + Quadratic lay date | 10 | 0.01 | 8.48 | 0.66 | 0.07 |
| Age + Season + Polyandry | 10 | 0.00 | 23.25 | 0.66 | 0.07 |
| Age + Quadratic lay date | 9 | 0.00 | 31.41 | 0.66 | 0.06 |
| Quadratic lay date + Polyandry | 7 | 0.00 | 48.59 | 0.65 | 0.02 |
| Quadratic lay date | 6 | 0.00 | 56.94 | 0.65 | 0.02 |
| Null | 4 | 0.00 | 93.62 | 0.65 | 0.00 |
Take a closer look at fixed effects of top model.
# check model components
# quadratic senescence effect
plot(effect("poly(est_age, 2)", mod6))# selective appearance effect
plot(effect("firstage", mod6))# selective disappearance effect
plot(effect("lastage", mod6))# quadratic lay date effect
plot(effect("poly(jul_std_date, 2)", mod6))# polyandry effect
plot(effect("polyandry", mod6))# visualize residuals of top model
Fig_S2 <-
ggplot(augment(mod6), aes(x = .fitted, y = .resid)) +
geom_point(alpha = 0.5) +
geom_hline(yintercept = 0,
linetype = "dashed", color = "grey") +
ylab("Residual") +
xlab("Fitted value")ggpairs(mod6,
columns = c("est_age", "firstage", "lastage",
"jul_std_date", "polyandry"))# VIF function for lmer models (taken from: https://github.com/aufrank/R-hacks/blob/master/mer-utils.R)
vif.lme <- function (fit) {
## adapted from rms::vif
v <- vcov(fit)
nam <- names(fixef(fit))
## exclude intercepts
ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)"))
if (ns > 0) {
v <- v[-(1:ns), -(1:ns), drop = FALSE]
nam <- nam[-(1:ns)] }
d <- diag(v)^0.5
v <- diag(solve(v/(d %o% d)))
names(v) <- nam
v }
vif.lme(mod6) # all VIF statistics are < 2 so there is no evidence of collinearity## poly(est_age, 2)1 poly(est_age, 2)2 firstage
## 1.091534 1.019954 1.334666
## lastage poly(jul_std_date, 2)1 poly(jul_std_date, 2)2
## 1.352708 1.004238 1.012533
## polyandrypoly
## 1.035224
# Is there a difference of number of reproductive events between monogamous
# and polyandrous females?
# wrangle data
dbReadTable(Ceuta_OPEN, "BirdRef") %>%
dplyr::select(year, ID, male, female) %>%
arrange(female) %>%
group_by(female, year) %>%
summarise(n_mates = n_distinct(male, na.rm = TRUE),
n_nests = n_distinct(ID, na.rm = TRUE)) %>%
mutate(polyandry = ifelse(n_mates > 1, "poly", "mono"),
year = as.integer(year),
n_mates = ifelse(n_mates == 0, 1, n_mates),
multi_clutch = ifelse(n_nests > 1, 1, 0)) %>%
rename(ring = female) %>%
mutate(polyandry = as.factor(polyandry)) %>%
Rmisc::summarySEwithin(.,
measurevar = "n_nests",
withinvars = "polyandry",
idvar = "ring",
conf.interval = 0.95) %>%
mutate(lcl = n_nests - ci,
ucl = n_nests + ci) %>%
ggplot(.) +
geom_bar(aes(x = polyandry, y = n_nests, fill = polyandry),
colour = "grey40", stat = "identity", alpha = 0.4,
width = 0.5) +
geom_errorbar(width = 0.1, aes(x = polyandry, y = n_nests,
ymin = lcl, ymax = ucl)) +
ylab("Number of nests per female per year (mean ± 95% CI)") +
xlab("Mating tactics of a given female in a given year") +
scale_x_discrete(labels = c("mono" = "Monogamous",
"poly" = "Polyandrous")) +
scale_fill_manual(values = c("black", "#f03b20")) +
theme(legend.position = "none")# is lay date confounded by age (i.e., does lay date vary with age and thus complicate the senescence effect)?
# first extract the first nests of each individual for each year
nest1_IDs <-
nest_caps_F %>%
group_by(ring, year) %>%
arrange(ring, year, jul_std_date) %>%
distinct() %>%
slice(1)
# subset to remove outliers
eggdf_first_nests <-
eggdf %>%
filter(ID %in% nest1_IDs$ID) %>%
dplyr::select(est_age, year, ring, jul_std_date, firstage, lastage, polyandry) %>%
distinct()
laydate_age_quad <-
lmer(jul_std_date ~ poly(est_age, 2) + firstage + lastage + polyandry +
(1|ring),
data = eggdf_first_nests)
plot(effect("poly(est_age, 2)", laydate_age_quad))laydate_age_linear <-
lmer(jul_std_date ~ est_age + firstage + lastage + polyandry +
(1|ring),
data = eggdf_first_nests)
plot(effect("est_age", laydate_age_linear))# strong effect of polyandry on laydate of first nests:
# When an individual is polyandrous in a given year, their first nest will be
# much earlier than those that are monogamous in a given year. This makes sense.
plot(effect("polyandry", laydate_age_quad)) plot(est_age ~ jul_std_date, data = eggdf_first_nests) # no apparent relationshipcor(x = eggdf_first_nests$est_age, y = eggdf_first_nests$jul_std_date) # R2 = 0.02## [1] -0.01147054
# is polyandry confounded by laydate (i.e., does liklihood of polyandry vary with lay date and thus complicate the senescence effect)?
lay_date_polyandry_df <-
eggdf %>%
dplyr::select(ring, year, est_age, firstage, lastage, polyandry, jul_std_date) %>%
distinct() %>%
mutate(polyandry = as.factor(polyandry))
poly_lay_date_linear <-
glmer(polyandry ~ jul_std_date + firstage + lastage +
(1|ring) + (1|year), data = lay_date_polyandry_df, family = "binomial")
poly_lay_date_quadratic <-
glmer(polyandry ~ poly(jul_std_date, 2) + firstage + lastage +
(1|ring) + (1|year), data = lay_date_polyandry_df, family = "binomial")
poly_lay_date_null <-
glmer(polyandry ~ firstage + lastage +
(1|ring) + (1|year), data = lay_date_polyandry_df, family = "binomial")
summary(poly_lay_date_linear)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: polyandry ~ jul_std_date + firstage + lastage + (1 | ring) +
## (1 | year)
## Data: lay_date_polyandry_df
##
## AIC BIC logLik deviance df.resid
## 314.5 336.1 -151.3 302.5 264
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3419 -0.4845 -0.2865 0.6471 2.7487
##
## Random effects:
## Groups Name Variance Std.Dev.
## ring (Intercept) 2.4520 1.5659
## year (Intercept) 0.5023 0.7087
## Number of obs: 270, groups: ring, 51; year, 13
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.06236 0.68733 -1.546 0.1222
## jul_std_date 0.06488 0.16128 0.402 0.6875
## firstage 1.25358 0.50255 2.494 0.0126 *
## lastage -0.21541 0.15596 -1.381 0.1672
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) jl_st_ firstg
## jul_std_dat 0.033
## firstage -0.036 0.020
## lastage -0.705 -0.032 -0.462
summary(poly_lay_date_quadratic)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: polyandry ~ poly(jul_std_date, 2) + firstage + lastage + (1 |
## ring) + (1 | year)
## Data: lay_date_polyandry_df
##
## AIC BIC logLik deviance df.resid
## 312.8 338.0 -149.4 298.8 263
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8732 -0.4849 -0.2723 0.6350 2.8135
##
## Random effects:
## Groups Name Variance Std.Dev.
## ring (Intercept) 2.2584 1.5028
## year (Intercept) 0.5411 0.7356
## Number of obs: 270, groups: ring, 51; year, 13
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.1387 0.6769 -1.682 0.0925 .
## poly(jul_std_date, 2)1 1.3146 2.6999 0.487 0.6263
## poly(jul_std_date, 2)2 5.5504 2.8523 1.946 0.0517 .
## firstage 1.2321 0.4912 2.508 0.0121 *
## lastage -0.1942 0.1521 -1.277 0.2017
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) p(__,2)1 p(__,2)2 firstg
## ply(j__,2)1 -0.018
## ply(j__,2)2 -0.099 0.052
## firstage -0.041 0.033 0.020
## lastage -0.700 -0.025 0.059 -0.457
poly_lay_date_model_list <-
list(poly_lay_date_linear, poly_lay_date_quadratic, poly_lay_date_null)
poly_lay_date_R2_list <- lapply(poly_lay_date_model_list, function(x) r2_nakagawa(x))
poly_lay_date_R2_list <- lapply(poly_lay_date_R2_list, function(x) unlist(x))
poly_lay_date_R2_list <- lapply(poly_lay_date_R2_list, function(x) as.data.frame(x))
R2_estimates <-
data.frame(condR2 = c(unlist(lapply(poly_lay_date_R2_list, function(x) x$x[1]))),
margR2 = c(unlist(lapply(poly_lay_date_R2_list, function(x) x$x[2]))),
model = c("poly_lay_date_linear", "poly_lay_date_quadratic",
"poly_lay_date_null"))
poly_lay_date_mod_names_fixef <-
data.frame(fixeffect = c("Null",
"Quadratic lay date effect",
"Linear lay date effect"),
model = c("poly_lay_date_null", "poly_lay_date_quadratic", "poly_lay_date_linear"))
AICc(poly_lay_date_linear, poly_lay_date_quadratic, poly_lay_date_null) %>%
mutate(model = row.names(.),
deltaAICc = AICc - min(AICc),
AICcWt = Weights(AICc)) %>%
arrange(AICc) %>%
mutate(ER = AICcWt[1]/AICcWt) %>%
collect() %>%
left_join(., poly_lay_date_mod_names_fixef, by = "model") %>%
left_join(., R2_estimates, by = "model") %>%
dplyr::select(fixeffect, df, AICcWt, deltaAICc, condR2, margR2) %>%
gt() %>%
cols_label(fixeffect = "Predictors of Polyandry",
df = md("***k***"),
AICcWt = md("***w***"),
deltaAICc = md("\U0394*AIC*"),
condR2 = md("*R<sup>2</sup><sub>conditional</sub>*"),
margR2 = md("*R<sup>2</sup><sub>marginal</sub>*")) %>%
fmt_number(columns = vars(deltaAICc, AICcWt,
condR2, margR2),
decimals = 2,
use_seps = FALSE) %>%
tab_options(column_labels.font.weight = "bold",
table.width = pct(80),
column_labels.font.size = 14,
table.font.size = 12,
data_row.padding = 5) %>%
cols_align(align = "left",
columns = vars(fixeffect))| Predictors of Polyandry | k | w | ΔAIC | R2conditional | R2marginal |
|---|---|---|---|---|---|
| Null | 5 | 0.44 | 0.00 | 0.53 | 0.11 |
| Quadratic lay date effect | 7 | 0.39 | 0.28 | 0.53 | 0.13 |
| Linear lay date effect | 6 | 0.17 | 1.93 | 0.53 | 0.12 |
plot(effect("jul_std_date", poly_lay_date_linear)) # no apparent relationship# is age confounded by polyandry (i.e., does age vary with liklihood to be polyandrous and thus complicate the senescence effect)?
agedf <-
eggdf %>%
dplyr::select(ring, year, est_age, firstage, lastage, polyandry, jul_std_date) %>%
distinct() %>%
mutate(polyandry = as.factor(polyandry))
poly_age_linear <-
glmer(polyandry ~ est_age + firstage + lastage +
(1|ring) + (1|year), data = agedf, family = "binomial")
poly_age_quadratic <-
glmer(polyandry ~ poly(est_age, 2) + firstage + lastage +
(1|ring) + (1|year), data = agedf, family = "binomial")
poly_age_null <-
glmer(polyandry ~ firstage + lastage +
(1|ring) + (1|year), data = agedf, family = "binomial")
summary(poly_age_linear)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: polyandry ~ est_age + firstage + lastage + (1 | ring) + (1 |
## year)
## Data: agedf
##
## AIC BIC logLik deviance df.resid
## 314.3 335.9 -151.2 302.3 264
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1640 -0.5083 -0.2842 0.6502 3.0418
##
## Random effects:
## Groups Name Variance Std.Dev.
## ring (Intercept) 2.4393 1.5618
## year (Intercept) 0.4347 0.6593
## Number of obs: 270, groups: ring, 51; year, 13
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.09935 0.68376 -1.608 0.1079
## est_age 0.08748 0.13876 0.630 0.5284
## firstage 1.15684 0.51483 2.247 0.0246 *
## lastage -0.24566 0.16482 -1.490 0.1361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) est_ag firstg
## est_age -0.061
## firstage -0.016 -0.267
## lastage -0.647 -0.334 -0.331
summary(poly_age_quadratic)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: polyandry ~ poly(est_age, 2) + firstage + lastage + (1 | ring) +
## (1 | year)
## Data: agedf
##
## AIC BIC logLik deviance df.resid
## 315.0 340.2 -150.5 301.0 263
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.2221 -0.4873 -0.2775 0.6302 2.8143
##
## Random effects:
## Groups Name Variance Std.Dev.
## ring (Intercept) 2.5029 1.5821
## year (Intercept) 0.3754 0.6127
## Number of obs: 270, groups: ring, 51; year, 13
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.9031 0.7440 -1.214 0.2248
## poly(est_age, 2)1 2.8664 4.7256 0.607 0.5441
## poly(est_age, 2)2 -4.0730 3.9374 -1.034 0.3009
## firstage 1.0781 0.5174 2.084 0.0372 *
## lastage -0.2267 0.1649 -1.375 0.1692
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) p(_,2)1 p(_,2)2 firstg
## ply(st_,2)1 0.385
## ply(st_,2)2 0.064 0.125
## firstage -0.134 -0.267 0.072
## lastage -0.746 -0.301 -0.089 -0.343
poly_age_model_list <-
list(poly_age_linear, poly_age_quadratic, poly_age_null)
poly_age_R2_list <- lapply(poly_age_model_list, function(x) r2_nakagawa(x))
poly_age_R2_list <- lapply(poly_age_R2_list, function(x) unlist(x))
poly_age_R2_list <- lapply(poly_age_R2_list, function(x) as.data.frame(x))
R2_estimates <-
data.frame(condR2 = c(unlist(lapply(poly_age_R2_list, function(x) x$x[1]))),
margR2 = c(unlist(lapply(poly_age_R2_list, function(x) x$x[2]))),
model = c("poly_age_linear", "poly_age_quadratic", "poly_age_null"))
poly_age_mod_names_fixef <-
data.frame(fixeffect = c("Null",
"Quadratic age effect",
"Linear age effect"),
model = c("poly_age_null", "poly_age_quadratic", "poly_age_linear"))
AICc(poly_age_linear, poly_age_quadratic, poly_age_null) %>%
mutate(model = row.names(.),
deltaAICc = AICc - min(AICc),
AICcWt = Weights(AICc)) %>%
arrange(AICc) %>%
mutate(ER = AICcWt[1]/AICcWt) %>%
collect() %>%
left_join(., poly_age_mod_names_fixef, by = "model") %>%
left_join(., R2_estimates, by = "model") %>%
dplyr::select(fixeffect, df, AICcWt, deltaAICc, condR2, margR2) %>%
gt() %>%
cols_label(fixeffect = "Predictors of Polyandry",
df = md("***k***"),
AICcWt = md("***w***"),
deltaAICc = md("\U0394*AIC*"),
condR2 = md("*R<sup>2</sup><sub>conditional</sub>*"),
margR2 = md("*R<sup>2</sup><sub>marginal</sub>*")) %>%
fmt_number(columns = vars(deltaAICc, AICcWt,
condR2, margR2),
decimals = 2,
use_seps = FALSE) %>%
tab_options(column_labels.font.weight = "bold",
table.width = pct(80),
column_labels.font.size = 14,
table.font.size = 12,
data_row.padding = 5) %>%
cols_align(align = "left",
columns = vars(fixeffect))| Predictors of Polyandry | k | w | ΔAIC | R2conditional | R2marginal |
|---|---|---|---|---|---|
| Null | 5 | 0.58 | 0.00 | 0.53 | 0.11 |
| Linear age effect | 6 | 0.25 | 1.71 | 0.53 | 0.11 |
| Quadratic age effect | 7 | 0.17 | 2.52 | 0.53 | 0.12 |
plot(effect("est_age", poly_age_linear)) # no apparent relationshipxtabs(~polyandry + est_age, data = agedf) # no apparent relationship## est_age
## polyandry 0 1 2 3 4 5 6 7 8 9 12
## mono 26 42 39 28 12 11 5 4 2 1 1
## poly 8 20 21 14 17 10 6 3 0 0 0
# no interaction between polyandry and age on eggv
mod_poly_x_age <- lmer(eggv ~ polyandry * poly(est_age, 2) +
firstage + lastage + poly(jul_std_date, 2) +
(1|ring/ID), data = eggdf)
plot(effect("polyandry * poly(est_age, 2)", mod_poly_x_age))AIC(mod_poly_x_age, mod6) # vastly worse fit...more evidence of no relationshipHere we retrieve mean and credible intervals for fixed effects, random effects, and residual variance of mod. Then we create separate table for each model component. This essentially reproduces the model estimate table in Table 1 of Dingemanse et al. (JAE, 2020; DOI: 10.1111/1365-2656.13122) (code within this chunk is adapted from code provided by Niels Dingemanse on March 6, 2020)
First simulate posteriors of the model parameters. Note, we put a constraint on the quadratic terms, such that only simulated model estimates producing a peak >= 1 and <= 11 are accepted. Our rationale for doing this is that these are the ages of our dataset and are thus biologically meaningful.
# set seed to make simulation reproducable
set.seed(14)
# specify the number of simulations you would like to produce
n_sim = 5000
# respecify model with quadratic components broken into the linear and polynomial terms
mod6 <- lmer(eggv ~ est_age + I(est_age^2) + firstage + lastage +
jul_std_date + I(jul_std_date^2) + polyandry +
(1|ring/ID) + (1|year),
data = eggdf)
# create a sim object containing all the components of mod6
mod6_sim <- sim(mod6, n.sim = n_sim)
# Simulate posterior distribution of model estimates. Only accept simulated
# model estimates which produce a peak >= 1 and <= 10 (these are the ages
# of our dataset and are thus biologically meaningful)
for (i in 1:n_sim) {
# set peak age to 0 at start of each loop
bsim_peak_age <- 0
# set attempt to 0 at start of each loop
attempt <- 0
# store simulated estimates only if peak >= 1 and <= 10 and it's less than
# 100 attempts
while( (bsim_peak_age < 1 | bsim_peak_age > 10) && attempt <= 100 ) {
# next attempt
attempt <- attempt + 1
# simulate an estimate
try(
mod6_sim_try <- sim(mod6, n.sim = 1)
)
# store calculated peak (i.e., the apex of the polynomial curve)
try(
bsim_peak_age <- (-(mod6_sim_try@fixef[, 2]) / (2 * mod6_sim_try@fixef[, 3]))
)
}
# store fixed effects
mod6_sim@fixef[i, ] <- mod6_sim_try@fixef
# store random effect for nest (only 3 simulated values needed as there
# are only 3 eggs per nest)
if(i < 4){
mod6_sim@ranef$`ID:ring`[i, , ] <- mod6_sim_try@ranef$`ID:ring`
}
else{
# store other random effects
mod6_sim@ranef$ring[i, , ] <- mod6_sim_try@ranef$ring
mod6_sim@ranef$year[i, , ] <- mod6_sim_try@ranef$year
# store residual
mod6_sim@sigma[i] <- mod6_sim_try@sigma
}
}Retrieve all random effect estimates (mean, credible intervals)
# speficy column names of the sim object as the names of the model components
colnames(mod6_sim@fixef) <-
names(fixef(mod6))
# Retrieve all random effect estimates (mean, credible intervals)
# simultaneously
coef_ranef_mod6 <-
lapply(ranef(mod6_sim), function(x) c(mean(apply(x[, , 1], 1, var)),
quantile(apply(x[, , 1], 1, var),
prob = c(0.025, 0.975))))
# Transpose the random effects table
ranefTable <-
as.data.frame(t(as.data.frame(coef_ranef_mod6))) %>%
rownames_to_column("coefName")Retrieve all fixed effect estimates (mean, credible intervals)
# Retrieve all fixed effect estimates (mean, credible intervals)
# simultaneously
coef_fixef_mod6 <-
rbind(apply(mod6_sim@fixef, 2, mean),
apply(mod6_sim@fixef, 2, quantile,
prob = c(0.025, 0.975)))
# Transpose the fixed effects table
fixefTable <-
as.data.frame(t(as.data.frame(coef_fixef_mod6))) %>%
rownames_to_column("coefName")Calculate adjusted repeatabilities
# Retrieve residual variance estimate (mean, credible intervals)
coef_res_mod6 <-
c(mean(mod6_sim@sigma^2), quantile(mod6_sim@sigma^2, c(0.025, 0.975)))
# Transpose the residual effects table
resTable <-
as.data.frame(t(as.data.frame(coef_res_mod6))) %>%
mutate(coefName = "residual")
# Calculate total phenotypic variance not explained by fixed effects
ranefAndResidualAseggdf <-
cbind(as.data.frame(lapply(ranef(mod6_sim),
function(x) apply(x[, , 1], 1, var))),
residual = mod6_sim@sigma^2)
ranefAndResidualAseggdf$varTotal <-
rowSums(ranefAndResidualAseggdf)
# Express each random effect as proportion of total (rpt)
rpt_each <-
ranefAndResidualAseggdf %>%
mutate_at(vars( -varTotal), funs(./varTotal)) %>%
suppressWarnings()
coef_rpt_all <-
apply(rpt_each, 2,
function(x) c(mean (x), quantile (x, prob = c(0.025, 0.975))))
coefRptTable <-
as.data.frame(t(coef_rpt_all)) %>%
rownames_to_column("coefName") %>%
filter(!coefName %in% c("varTotal"))Retrieve peak performance estimates
# Calculate the traits' value at peak performance and store in table
ypeak_mod <-
mod6_sim@fixef[, 1] - ((mod6_sim@fixef[, 2])^2 / (4 * mod6_sim@fixef[, 3]))
coef_ypeak_mod <-
c(mean(ypeak_mod), quantile(ypeak_mod, c(0.025,0.975)))
coefYpeakTable <-
as.data.frame(cbind(coefName = "Egg volume at peak",
as.data.frame(t(as.data.frame(coef_ypeak_mod)))))
# Calculate the age at peak performance and store in table
xpeak_mod <-
-(mod6_sim@fixef[, 2]) / (2 * mod6_sim@fixef[, 3])
coef_xpeak_mod <-
c(mean(xpeak_mod), quantile(xpeak_mod, c(0.025, 0.975)))
coefXpeakTable <-
as.data.frame(cbind(coefName = "Age at peak",
as.data.frame(t(as.data.frame(coef_xpeak_mod)))))# Calculate the difference between age 0 and 1 expected from (age+age^2) and
# Min_Age (if mirrored is their sum).
difAgeMin_mod <-
mod6_sim@fixef[, 2] + mod6_sim@fixef[, 3] + mod6_sim@fixef[, 4]
coef_difAgeMin_mod <-
c(mean(difAgeMin_mod), quantile(difAgeMin_mod, c(0.025, 0.975)))
coefdifAgeMinTable <-
as.data.frame(cbind(coefName = "DifAgeMin",
as.data.frame(t(as.data.frame(coef_difAgeMin_mod)))))# create data that contains all factor levels and covariate means to use for
# calculating predictions
new_data <- expand.grid(est_age = seq(0:max(eggdf$est_age)) - 1,
PrePeak_mod = c("1","0"),
firstage = mean(eggdf$firstage),
lastage = mean(eggdf$lastage),
jul_std_date = mean(eggdf$jul_std_date),
polyandry = c("mono", "poly"))
# create an empty list to store for-loop output
bs.predictions <- list(
# peak values from previous simulation
peaks = xpeak_mod,
# age-specific predictions
predictions = matrix(ncol = n_sim, nrow = nrow(new_data)),
# slope of pre-peak effect
PrePeak_age_effect = matrix(ncol = n_sim, nrow = 1),
# slope of post-peak effect
PostPeak_age_effect = matrix(ncol = n_sim, nrow = 1))
# for-loop to run peak analysis on all simulated posteriors above
for (i in 1:n_sim) {
# start loop with mod6peak as NULL and attempt as 0
mod6peak <- NULL
attempt <- 0
# store model output and predictions only if mod6peak converged (i.e., is not
# NULL) and it's less than 100 attempts
while(is.null(mod6peak) && attempt <= 100) {
# next attempt
attempt <- attempt + 1
# set peak based on the mean estimate from the previous simulation
eggdf$PrePeak_mod[eggdf$est_age < (ceiling(bs.predictions$peaks[i]) + 1)] <- "0"
eggdf$PrePeak_mod[eggdf$est_age > ceiling(bs.predictions$peaks[i])] <- "1"
try(
# run peak analysis (i.e., now the quadratic effect is broken up into two
# pieces reflective of the pre- and post-peak sections of the curve)
mod6peak <-
lmer(formula = eggv ~ PrePeak_mod + est_age * PrePeak_mod +
firstage + lastage + as.numeric(jul_std_date) +
I(as.numeric(jul_std_date)^2) + polyandry +
(1|ring/ID) + (1|year),
data = eggdf)
)
try(
# calculate predictions from model
bs.predictions$predictions[, i] <-
predict(mod6peak, new_data, re.form = NA)
)
}
# calculate the slope of the pre-peak age effect (i.e., because pre-peak is
# set as the baseline level of the factor, this is simply the baseline age
# slope)
bs.predictions$PrePeak_age_effect[i] <-
ifelse("PrePeak_mod1:est_age" %in%
row.names(summary(mod6peak)$coefficients),
summary(mod6peak)$coefficients["est_age","Estimate"],
NA)
# calculate the slope of the post-peak age effect (i.e., because pre-peak is
# set as the baseline level of the factor, this is calculated as the baseline
# age slope plus the interaction slope)
bs.predictions$PostPeak_age_effect[i] <-
ifelse("PrePeak_mod1:est_age" %in%
row.names(summary(mod6peak)$coefficients),
summary(mod6peak)$coefficients["est_age","Estimate"] +
summary(mod6peak)$coefficients["PrePeak_mod1:est_age","Estimate"],
NA)
}Retrieve pre- and post-peak age effects
# Retrieve pre-peak age estimate (mean, credible intervals)
coefPrePeakAgeTable <-
data.frame(coefName = "Pre-peak age effect",
mean_estimate = mean(bs.predictions$PrePeak_age_effect,
na.rm = TRUE),
lower95 = quantile(bs.predictions$PrePeak_age_effect,
prob = c(0.025), na.rm = TRUE),
upper95 = quantile(bs.predictions$PrePeak_age_effect,
prob = c(0.975), na.rm = TRUE),
row.names = 1)
# Retrieve post-peak age estimate (mean, credible intervals)
coefPostPeakAgeTable <-
data.frame(coefName = "Post-peak age effect",
mean_estimate = mean(bs.predictions$PostPeak_age_effect,
na.rm = TRUE),
lower95 = quantile(bs.predictions$PostPeak_age_effect,
prob = c(0.025), na.rm = TRUE),
upper95 = quantile(bs.predictions$PostPeak_age_effect,
prob = c(0.975), na.rm = TRUE),
row.names = 1)Compile all results into one table
# Retrieve sample sizes
sample_sizes <-
eggdf %>%
summarise(Year = n_distinct(year),
Individual = n_distinct(ring),
Nests = n_distinct(ID),
Observations = nrow(eggdf))
sample_sizes <-
as.data.frame(t(as.data.frame(sample_sizes))) %>%
rownames_to_column("coefName") %>%
rename(mean_estimate = V1)
# clean model component names
mod_comp_names <-
data.frame(comp_name = c("Intercept",
"Linear age",
"Quadratic age",
"First age",
"Last age",
"Linear lay date",
"Quadratic lay date",
"Polyandry",
"Nest / Individual",
"Individual",
"Year",
"Residual",
"Nest / Individual",
"Individual",
"Year",
"Residual",
"Egg volume at peak",
"Age at peak",
"Pre-peak age effect",
"Post-peak age effect",
"Years",
"Individuals",
"Nests",
"Observations (i.e., Eggs)"))
# Store all parameters into a single table and clean it up
allCoefs_mod <-
bind_rows(fixefTable,
ranefTable,
resTable,
coefRptTable,
coefYpeakTable,
coefXpeakTable) %>%
rename(lower95 = `2.5%`,
upper95 = `97.5%`,
mean_estimate = V1) %>%
bind_rows(.,
coefPrePeakAgeTable,
coefPostPeakAgeTable,
sample_sizes) %>%
bind_cols(.,
mod_comp_names) %>%
# back-transform the age at peak estimate to the correct age at first reproduction
mutate(mean_estimate = ifelse(coefName == "Age at peak", mean_estimate + 1, mean_estimate),
lower95 = ifelse(coefName == "Age at peak", lower95 + 1, lower95),
upper95 = ifelse(coefName == "Age at peak", upper95 + 1, upper95)) %>%
mutate(coefString = ifelse(!is.na(lower95),
paste0("[",
round(lower95, 2), ", ",
round(upper95, 2), "]"),
NA),
effect = c(rep("Fixed effects \U1D6FD", nrow(fixefTable)),
rep("Random effects \U1D70E\U00B2", nrow(ranefTable)),
rep("Random effects \U1D70E\U00B2", nrow(resTable)),
rep("Adjusted Repeatability \U1D45F", nrow(coefRptTable)),
rep("Peak Performance \U1D6FD", nrow(coefYpeakTable)),
rep("Peak Performance \U1D6FD", nrow(coefXpeakTable)),
rep("Pre-/post-peak analysis \U1D6FD", nrow(coefPrePeakAgeTable)),
rep("Pre-/post-peak analysis \U1D6FD", nrow(coefPostPeakAgeTable)),
rep("Sample sizes \U1D45B", nrow(sample_sizes)))) %>%
dplyr::select(effect, everything())
# re-organize model components for table
allCoefs_mod <-
allCoefs_mod[c(c(1:8), 10, 9, 11, 12, 14, 13, 15, 16, c(17:24)), ]Wrangle predicted values for each iteration of the simulation proceedure (for visual purposes)
# expand new data across the 5000 simulations for binding to predictions
new_data_expanded <-
data.frame(new_data)[rep(seq_len(nrow(data.frame(new_data))), n_sim), ]
# wrangle predicted values of simulation for plotting
predicted_values <-
# melt the predictions and peaks of the simulation and merge each dataframe
left_join(melt(bs.predictions$predictions),
data.frame(value = melt(bs.predictions$peaks),
Var2 = c(1:n_sim)),
by = "Var2") %>%
# add the new_data levels to each iteration's vector of predictions
bind_cols(., new_data_expanded) %>%
# rename columns
rename(iteration = Var2,
eggv = value.x,
peak_age = value.y) %>%
# remove unneeded columns
dplyr::select(-Var1) %>%
# filter dataframe so that each iteration only contains pre- and post-peak
# predictions that are within the peak
filter((PrePeak_mod == "0" & est_age < (ceiling(peak_age) + 1)) |
(PrePeak_mod == "1" & est_age > ceiling(peak_age)))Store model output
results_mod6 <- list(mod6_posteriors = mod6_sim,
mod6_peak_analysis_predictions = predicted_values,
mod6_summary_table = allCoefs_mod)Produce table of effect sizes in a similar layout to that shown in Table 1 of Dingemanse et al. JAE 2020 (DOI: 10.1111/1365-2656.13122).
Run the model with “poly(est_age, 2, raw = TRUE)” instead of “est_age + I(est_age^2)” to make plotting the trend line easier
mod6a <-
lmer(eggv ~ poly(est_age, 2, raw = TRUE) + firstage + lastage +
poly(jul_std_date, 2, raw = TRUE) + polyandry +
(1|ring/ID) + (1|year),
data = eggdf)
# extract the fitted values of the polynomial age effect
mod6a_age_fits <-
as.data.frame(effect("poly(est_age, 2, raw = TRUE)", mod6a,
xlevels = list(est_age = seq(min(eggdf$est_age),
max(eggdf$est_age), 1))))
# extract the fitted values of the polynomial season effect
mod6a_season_fits <-
as.data.frame(effect("poly(jul_std_date, 2, raw = TRUE)", mod6a,
xlevels = list(jul_std_date = seq(min(eggdf$jul_std_date),
max(eggdf$jul_std_date), 0.1))))Here we show the overall results of the analysis. The black line shows the overall quadratic trend of egg volume with age while controlling for selective appearence and dissappearence. The orange lines show the predicted pre-peak trends for each run of the simulation, while the green lines show the same for the post-peak trend. The points in the plot visualise the raw data, with each point being a single egg. The density plot on the top of the figure illustrates the spread of the ‘age at peak performance’ captured by our simulation proceedure. The vertical dashed line represents the median age of this posterior distribution.
Here we show the overall results of the analysis. The black line shows the overall quadratic trend of egg volume with age while controlling for selective appearence and dissappearence. The orange lines show the predicted pre-peak trends for each run of the simulation, while the green lines show the same for the post-peak trend. The points in the plot visualise the raw data, with each point being a single egg. The density plot on the top of the figure illustrates the spread of the ‘age at peak performance’ captured by our simulation proceedure. The vertical dashed line represents the median age of this posterior distribution.
Here we visualize the effect sizes for all parameters estimated in our model (95% credible intervals are shown around each mean estimate).
Here we rerun the whole simulation procedure on a dataset that remove outlying data (i.e., very old observations from two females). See http://r-statistics.co/Outlier-Treatment-With-R.html
eggdf_out_rm <-
eggdf %>%
filter(est_age < 9)ages <-
eggdf_out_rm %>%
dplyr::select(ring, ID, est_age) %>%
distinct()
boxplot(ages$est_age, boxwex = 0.1)
boxplot.stats(ages$est_age)$out
boxplot(eggdf$est_age, boxwex = 0.1)
boxplot.stats(eggdf$est_age)$out
boxplot(eggdf$eggv ~ eggdf$est_age, boxwex = 0.4)
cooksd <- cooks.distance(mod6)
plot(cooksd, pch="*", cex=2, main="Influential Obs by Cooks distance") # plot cook's distance
abline(h = 4*mean(cooksd, na.rm=T), col="red") # add cutoff line
text(x=1:length(cooksd)+1, y=cooksd, labels=ifelse(cooksd>4*mean(cooksd, na.rm=T),names(cooksd),""), col="red") # add labels
influential <- as.numeric(names(cooksd)[(cooksd > 4*mean(cooksd, na.rm=T))]) # influential row numbers
head(eggdf[influential, ])
length(influential)# set seed to make simulation reproducable
set.seed(14)
# specify the number of simulations you would like to produce
n_sim = 5000
# respecify model with quadratic components broken into the linear and polynomial terms
mod6_out_rm <-
lmer(eggv ~ est_age + I(est_age^2) + firstage + lastage +
jul_std_date + I(jul_std_date^2) + polyandry +
(1|ring/ID) + (1|year),
data = eggdf_out_rm)
# create a sim object containing all the components of mod6_out_rm
mod6_out_rm_sim <- sim(mod6_out_rm, n.sim = n_sim)
# Simulate posterior distribution of model estimates. Only accept simulated
# model estimates which produce a peak >= 1 and <= 5
for (i in 1:n_sim) {
# set peak age to 0 at start of each loop
bsim_peak_age <- 0
# set attempt to 0 at start of each loop
attempt <- 0
# store simulated estimates only if peak >= 1 and <= 5 and it's less than
# 100 attempts
while( (bsim_peak_age < 1 | bsim_peak_age > 7) && attempt <= 100 ) {
# next attempt
attempt <- attempt + 1
# simulate an estimate
try(
mod6_out_rm_sim_try <- sim(mod6_out_rm, n.sim = 1)
)
# store calculated peak (i.e., the apex of the polynomial curve)
try(
bsim_peak_age <- (-(mod6_out_rm_sim_try@fixef[, 2]) / (2 * mod6_out_rm_sim_try@fixef[, 3]))
)
}
# store fixed effects
mod6_out_rm_sim@fixef[i, ] <- mod6_out_rm_sim_try@fixef
# store random effect for nest (only 3 simulated values needed as there
# are only 3 eggs per nest)
if(i < 4){
mod6_out_rm_sim@ranef$`ID:ring`[i, , ] <- mod6_out_rm_sim_try@ranef$`ID:ring`
}
else{
# store other random effects
mod6_out_rm_sim@ranef$ring[i, , ] <- mod6_out_rm_sim_try@ranef$ring
mod6_out_rm_sim@ranef$year[i, , ] <- mod6_out_rm_sim_try@ranef$year
# store residual
mod6_out_rm_sim@sigma[i] <- mod6_out_rm_sim_try@sigma
}
}Retrieve all random effect estimates (mean, credible intervals)
# speficy column names of the sim object as the names of the model components
colnames(mod6_out_rm_sim@fixef) <-
names(fixef(mod6_out_rm))
# Retrieve all random effect estimates (mean, credible intervals)
# simultaneously
coef_ranef_mod6_out_rm <-
lapply(ranef(mod6_out_rm_sim), function(x) c(mean(apply(x[, , 1], 1, var)),
quantile(apply(x[, , 1], 1, var),
prob = c(0.025, 0.975))))
# Transpose the random effects table
ranefTable <-
as.data.frame(t(as.data.frame(coef_ranef_mod6_out_rm))) %>%
rownames_to_column("coefName")Retrieve all fixed effect estimates (mean, credible intervals)
# Retrieve all fixed effect estimates (mean, credible intervals)
# simultaneously
coef_fixef_mod6_out_rm <-
rbind(apply(mod6_out_rm_sim@fixef, 2, mean),
apply(mod6_out_rm_sim@fixef, 2, quantile,
prob = c(0.025, 0.975)))
# Transpose the fixed effects table
fixefTable <-
as.data.frame(t(as.data.frame(coef_fixef_mod6_out_rm))) %>%
rownames_to_column("coefName")Calculate adjusted repeatabilities
# Retrieve residual variance estimate (mean, credible intervals)
coef_res_mod6_out_rm <-
c(mean(mod6_out_rm_sim@sigma^2), quantile(mod6_out_rm_sim@sigma^2, c(0.025, 0.975)))
# Transpose the residual effects table
resTable <-
as.data.frame(t(as.data.frame(coef_res_mod6_out_rm))) %>%
mutate(coefName = "residual")
# Calculate total phenotypic variance not explained by fixed effects
ranefAndResidualAseggdf <-
cbind(as.data.frame(lapply(ranef(mod6_out_rm_sim),
function(x) apply(x[, , 1], 1, var))),
residual = mod6_out_rm_sim@sigma^2)
ranefAndResidualAseggdf$varTotal <-
rowSums(ranefAndResidualAseggdf)
# Express each random effect as proportion of total (rpt)
rpt_each <-
ranefAndResidualAseggdf %>%
mutate_at(vars( -varTotal), funs(./varTotal)) %>%
suppressWarnings()
coef_rpt_all <-
apply(rpt_each, 2,
function(x) c(mean (x), quantile (x, prob = c(0.025, 0.975))))
coefRptTable <-
as.data.frame(t(coef_rpt_all)) %>%
rownames_to_column("coefName") %>%
filter(!coefName %in% c("varTotal"))Retrieve peak performance estimates
# Calculate the traits' value at peak performance and store in table
ypeak_mod <-
mod6_out_rm_sim@fixef[, 1] - ((mod6_out_rm_sim@fixef[, 2])^2 / (4 * mod6_out_rm_sim@fixef[, 3]))
coef_ypeak_mod <-
c(mean(ypeak_mod), quantile(ypeak_mod, c(0.025,0.975)))
coefYpeakTable <-
as.data.frame(cbind(coefName = "Egg volume at peak",
as.data.frame(t(as.data.frame(coef_ypeak_mod)))))
# Calculate the age at peak performance and store in table
xpeak_mod <-
-(mod6_out_rm_sim@fixef[, 2]) / (2 * mod6_out_rm_sim@fixef[, 3])
coef_xpeak_mod <-
c(mean(xpeak_mod), quantile(xpeak_mod, c(0.025, 0.975)))
coefXpeakTable <-
as.data.frame(cbind(coefName = "Age at peak",
as.data.frame(t(as.data.frame(coef_xpeak_mod)))))# Calculate the difference between age 0 and 1 expected from (age+age^2) and
# Min_Age (if mirrored is their sum).
difAgeMin_mod <-
mod6_out_rm_sim@fixef[, 2] + mod6_out_rm_sim@fixef[, 3] + mod6_out_rm_sim@fixef[, 4]
coef_difAgeMin_mod <-
c(mean(difAgeMin_mod), quantile(difAgeMin_mod, c(0.025, 0.975)))
coefdifAgeMinTable <-
as.data.frame(cbind(coefName = "DifAgeMin",
as.data.frame(t(as.data.frame(coef_difAgeMin_mod)))))# create data that contains all factor levels and covariate means to use for
# calculating predictions
new_data_out_rm <- expand.grid(est_age = seq(0:max(eggdf_out_rm$est_age)) - 1,
PrePeak_mod = c("1","0"),
firstage = mean(eggdf_out_rm$firstage),
lastage = mean(eggdf_out_rm$lastage),
jul_std_date = mean(eggdf_out_rm$jul_std_date),
polyandry = c("mono", "poly"))
# create an empty list to store for-loop output
bs.predictions_out_rm <- list(
# peak values from previous simulation
peaks = xpeak_mod,
# age-specific predictions
predictions = matrix(ncol = n_sim, nrow = nrow(new_data_out_rm)),
# slope of pre-peak effect
PrePeak_age_effect = matrix(ncol = n_sim, nrow = 1),
# slope of post-peak effect
PostPeak_age_effect = matrix(ncol = n_sim, nrow = 1))
# for-loop to run peak analysis on all simulated posteriors above
for (i in 1:n_sim) {
# start loop with mod6_out_rmpeak as NULL and attempt as 0
mod6_out_rmpeak <- NULL
attempt <- 0
# store model output and predictions only if mod6_out_rmpeak converged (i.e., is not
# NULL) and it's less than 100 attempts
while(is.null(mod6_out_rmpeak) && attempt <= 100) {
# next attempt
attempt <- attempt + 1
# set peak based on the mean estimate from the previous simulation
eggdf_out_rm$PrePeak_mod[eggdf_out_rm$est_age < (ceiling(bs.predictions_out_rm$peaks[i]) + 1)] <- "0"
eggdf_out_rm$PrePeak_mod[eggdf_out_rm$est_age > ceiling(bs.predictions_out_rm$peaks[i])] <- "1"
try(
# run peak analysis (i.e., now the quadratic effect is broken up into two
# pieces reflective of the pre- and post-peak sections of the curve)
mod6_out_rmpeak <-
lmer(formula = eggv ~ PrePeak_mod + est_age * PrePeak_mod +
firstage + lastage + as.numeric(jul_std_date) +
I(as.numeric(jul_std_date)^2) + polyandry +
(1|ring/ID) + (1|year),
data = eggdf_out_rm)
)
try(
# calculate predictions from model
bs.predictions_out_rm$predictions[, i] <-
predict(mod6_out_rmpeak, new_data_out_rm, re.form = NA)
)
}
# calculate the slope of the pre-peak age effect (i.e., because pre-peak is
# set as the baseline level of the factor, this is simply the baseline age
# slope)
bs.predictions_out_rm$PrePeak_age_effect[i] <-
ifelse("PrePeak_mod1:est_age" %in%
row.names(summary(mod6_out_rmpeak)$coefficients),
summary(mod6_out_rmpeak)$coefficients["est_age","Estimate"],
NA)
# calculate the slope of the post-peak age effect (i.e., because pre-peak is
# set as the baseline level of the factor, this is calculated as the baseline
# age slope plus the interaction slope)
bs.predictions_out_rm$PostPeak_age_effect[i] <-
ifelse("PrePeak_mod1:est_age" %in%
row.names(summary(mod6_out_rmpeak)$coefficients),
summary(mod6_out_rmpeak)$coefficients["est_age","Estimate"] +
summary(mod6_out_rmpeak)$coefficients["PrePeak_mod1:est_age","Estimate"],
NA)
}Retrieve pre- and post-peak age effects
# Retrieve pre-peak age estimate (mean, credible intervals)
coefPrePeakAgeTable <-
data.frame(coefName = "Pre-peak age effect",
mean_estimate = mean(bs.predictions_out_rm$PrePeak_age_effect,
na.rm = TRUE),
lower95 = quantile(bs.predictions_out_rm$PrePeak_age_effect,
prob = c(0.025), na.rm = TRUE),
upper95 = quantile(bs.predictions_out_rm$PrePeak_age_effect,
prob = c(0.975), na.rm = TRUE),
row.names = 1)
# Retrieve post-peak age estimate (mean, credible intervals)
coefPostPeakAgeTable <-
data.frame(coefName = "Post-peak age effect",
mean_estimate = mean(bs.predictions_out_rm$PostPeak_age_effect,
na.rm = TRUE),
lower95 = quantile(bs.predictions_out_rm$PostPeak_age_effect,
prob = c(0.025), na.rm = TRUE),
upper95 = quantile(bs.predictions_out_rm$PostPeak_age_effect,
prob = c(0.975), na.rm = TRUE),
row.names = 1)Compile all results into one table
# Retrieve sample sizes
sample_sizes <-
eggdf_out_rm %>%
summarise(Year = n_distinct(year),
Individual = n_distinct(ring),
Nests = n_distinct(ID),
Observations = nrow(eggdf_out_rm))
sample_sizes <-
as.data.frame(t(as.data.frame(sample_sizes))) %>%
rownames_to_column("coefName") %>%
rename(mean_estimate = V1)
# clean model component names
mod_comp_names <-
data.frame(comp_name = c("Intercept",
"Linear age",
"Quadratic age",
"First age",
"Last age",
"Linear lay date",
"Quadratic lay date",
"Polyandry",
"Nest / Individual",
"Individual",
"Year",
"Residual",
"Nest / Individual",
"Individual",
"Year",
"Residual",
"Egg volume at peak",
"Age at peak",
"Pre-peak age effect",
"Post-peak age effect",
"Years",
"Individuals",
"Nests",
"Observations (i.e., Eggs)"))
# Store all parameters into a single table and clean it up
allCoefs_mod <-
bind_rows(fixefTable,
ranefTable,
resTable,
coefRptTable,
coefYpeakTable,
coefXpeakTable) %>%
rename(lower95 = `2.5%`,
upper95 = `97.5%`,
mean_estimate = V1) %>%
bind_rows(.,
coefPrePeakAgeTable,
coefPostPeakAgeTable,
sample_sizes) %>%
bind_cols(.,
mod_comp_names) %>%
# back-transform the age at peak estimate to the correct age at first reproduction
mutate(mean_estimate = ifelse(coefName == "Age at peak", mean_estimate + 1, mean_estimate),
lower95 = ifelse(coefName == "Age at peak", lower95 + 1, lower95),
upper95 = ifelse(coefName == "Age at peak", upper95 + 1, upper95)) %>%
mutate(coefString = ifelse(!is.na(lower95),
paste0("[",
round(lower95, 2), ", ",
round(upper95, 2), "]"),
NA),
effect = c(rep("Fixed effects \U1D6FD", nrow(fixefTable)),
rep("Random effects \U1D70E\U00B2", nrow(ranefTable)),
rep("Random effects \U1D70E\U00B2", nrow(resTable)),
rep("Adjusted Repeatability \U1D45F", nrow(coefRptTable)),
rep("Peak Performance \U1D6FD", nrow(coefYpeakTable)),
rep("Peak Performance \U1D6FD", nrow(coefXpeakTable)),
rep("Pre-/post-peak analysis \U1D6FD", nrow(coefPrePeakAgeTable)),
rep("Pre-/post-peak analysis \U1D6FD", nrow(coefPostPeakAgeTable)),
rep("Sample sizes \U1D45B", nrow(sample_sizes)))) %>%
dplyr::select(effect, everything())
# re-organize model components for table
allCoefs_mod <-
allCoefs_mod[c(c(1:8), 10, 9, 11, 12, 14, 13, 15, 16, c(17:24)), ]Wrangle predicted values for each iteration of the simulation proceedure (for visual purposes)
# expand new data across the 5000 simulations for binding to predictions
new_data_out_rm_expanded <-
data.frame(new_data_out_rm)[rep(seq_len(nrow(data.frame(new_data_out_rm))), n_sim), ]
# wrangle predicted values of simulation for plotting
predicted_values <-
# melt the predictions and peaks of the simulation and merge each dataframe
left_join(melt(bs.predictions_out_rm$predictions),
data.frame(value = melt(bs.predictions_out_rm$peaks),
Var2 = c(1:n_sim)),
by = "Var2") %>%
# add the new_data_out_rm levels to each iteration's vector of predictions
bind_cols(., new_data_out_rm_expanded) %>%
# rename columns
rename(iteration = Var2,
eggv = value.x,
peak_age = value.y) %>%
# remove unneeded columns
dplyr::select(-Var1) %>%
# filter dataframe so that each iteration only contains pre- and post-peak
# predictions that are within the peak
filter((PrePeak_mod == "0" & est_age < (ceiling(peak_age) + 1)) |
(PrePeak_mod == "1" & est_age > ceiling(peak_age)))Store model output
results_mod6_out_rm <-
list(mod6_out_rm_posteriors = mod6_out_rm_sim,
mod6_out_rm_peak_analysis_predictions = predicted_values,
mod6_out_rm_summary_table = allCoefs_mod)Produce table of effect sizes in a similar layout to that shown in Table 1 of Dingemanse et al. JAE 2020 (DOI: 10.1111/1365-2656.13122)
Run the model with “poly(est_age, 2, raw = TRUE)” instead of “est_age + I(est_age^2)” to make plotting the trend line easier
mod6_out_rma <-
lmer(eggv ~ poly(est_age, 2, raw = TRUE) + firstage + lastage +
poly(jul_std_date, 2, raw = TRUE) + polyandry +
(1|ring/ID) + (1|year),
data = eggdf_out_rm)
# extract the fitted values of the polynomial age effect
mod6_out_rma_age_fits <-
as.data.frame(effect("poly(est_age, 2, raw = TRUE)", mod6_out_rma,
xlevels = list(est_age = seq(min(eggdf_out_rm$est_age),
max(eggdf_out_rm$est_age), 1))))
# extract the fitted values of the polynomial season effect
mod6_out_rma_season_fits <-
as.data.frame(effect("poly(jul_std_date, 2, raw = TRUE)", mod6_out_rma,
xlevels = list(jul_std_date = seq(min(eggdf_out_rm$jul_std_date),
max(eggdf_out_rm$jul_std_date), 0.1))))Here we show the overall results of the analysis. The black line shows the overall quadratic trend of egg volume with age while controlling for selective appearence and dissappearence. The orange lines show the predicted pre-peak trends for each run of the simulation, while the green lines show the same for the post-peak trend. The points in the plot visualise the raw data, with each point being a single egg. The density plot on the top of the figure illustrates the spread of the ‘age at peak performance’ captured by our simulation proceedure. The vertical dashed line represents the median age of this posterior distribution.
Here we show the overall results of the analysis. The black line shows the overall quadratic trend of egg volume with age while controlling for selective appearence and dissappearence. The orange lines show the predicted pre-peak trends for each run of the simulation, while the green lines show the same for the post-peak trend. The points in the plot visualise the raw data, with each point being a single egg. The density plot on the top of the figure illustrates the spread of the ‘age at peak performance’ captured by our simulation proceedure. The vertical dashed line represents the median age of this posterior distribution.
Here we visualize the effect sizes for all parameters estimated in our model (95% credible intervals are shown around each mean estimate).
Display session information to enhance reproducability in light of version-dependancy
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United Kingdom.1252
## [2] LC_CTYPE=English_United Kingdom.1252
## [3] LC_MONETARY=English_United Kingdom.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United Kingdom.1252
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] broom_0.5.2 standardize_0.2.1 jpeg_0.1-8.1
## [4] extrafont_0.17 ggpubr_0.2 magrittr_1.5
## [7] magick_2.3 performance_0.4.4 GGally_2.0.0
## [10] MuMIn_1.43.17 lubridate_1.7.4 kableExtra_1.1.0
## [13] snowfall_1.84-6.1 snow_0.4-3 BaSTA_1.9.4
## [16] RSQLite_2.2.0 cowplot_1.0.0 gt_0.2.1
## [19] rmarkdown_2.1 gridExtra_2.3 reshape2_1.4.4
## [22] RColorBrewer_1.1-2 arm_1.11-1 MASS_7.3-51.1
## [25] coefplot_1.2.6 effects_4.1-4 carData_3.0-2
## [28] rmcorr_0.3.1 lme4_1.1-23 Matrix_1.2-15
## [31] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.5
## [34] purrr_0.3.3 tidyr_1.0.0 tibble_3.0.1
## [37] ggplot2_3.3.0 tidyverse_1.3.0 readr_1.3.1
##
## loaded via a namespace (and not attached):
## [1] useful_1.2.6 minqa_1.2.4 colorspace_1.4-0
## [4] ellipsis_0.3.0 estimability_1.3 fs_1.4.1
## [7] rstudioapi_0.10 farver_2.0.3 bit64_0.9-7
## [10] xml2_1.2.2 splines_3.5.2 knitr_1.25
## [13] jsonlite_1.6 nloptr_1.2.1 Rttf2pt1_1.3.8
## [16] dbplyr_1.4.2 png_0.1-7 compiler_3.5.2
## [19] httr_1.4.1 backports_1.1.3 assertthat_0.2.0
## [22] survey_3.37 cli_1.1.0 htmltools_0.4.0
## [25] tools_3.5.2 coda_0.19-2 gtable_0.2.0
## [28] glue_1.4.1 Rcpp_1.0.2 cellranger_1.1.0
## [31] vctrs_0.3.0 nlme_3.1-137 extrafontdb_1.0
## [34] insight_0.8.1 xfun_0.14 Rmisc_1.5
## [37] rvest_0.3.5 lifecycle_0.2.0 statmod_1.4.34
## [40] scales_1.1.1 hms_0.5.2 parallel_3.5.2
## [43] yaml_2.2.0 memoise_1.1.0 sass_0.2.0
## [46] reshape_0.8.8 stringi_1.2.4 highr_0.7
## [49] bayestestR_0.5.2 checkmate_2.0.0 boot_1.3-20
## [52] epuRate_0.1 commonmark_1.7 rlang_0.4.6
## [55] pkgconfig_2.0.2 evaluate_0.14 lattice_0.20-38
## [58] labeling_0.3 bit_1.1-14 tidyselect_1.1.0
## [61] plyr_1.8.4 R6_2.3.0 generics_0.0.2
## [64] DBI_1.1.0 pillar_1.4.4 haven_2.2.0
## [67] withr_2.1.2 survival_2.43-3 abind_1.4-5
## [70] nnet_7.3-12 modelr_0.1.5 crayon_1.3.4
## [73] readxl_1.3.1 blob_1.2.0 reprex_0.3.0
## [76] digest_0.6.18 webshot_0.5.1 stats4_3.5.2
## [79] munsell_0.5.0 viridisLite_0.3.0 mitools_2.4
HTML document made with epuRate by Yan Holtz.
A document by Lourenço Falcao Rodrigues, Anne Hertel, Luke Eberhart-Phillips